These procedures consists of importing the data set into the R environment, activating extras libraries, and organising data for performing analyses.
These are additional libraries used to specific data analysis.
library ("plotly") # Used for creating plots
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library ("ggplot2") # Used for creating charts
library ("mongolite") # Used for retrieving data from mongoDB
library ("effsize") # Used for calculating Vargha-Delaney A effect size
library ("lsr") # Used for calculating Cohen's d effect size
library ("pwr") # Used for calculating the statistical power of tests
library ("gmodels") # Used for calculating the Fisher's exact test
# Note that the data collected from students was stored as a collection in a MongoDB database to facilitate data analysis. The JSON file for creating the collection should be available along with this analysis.
dataset <- mongo (url = "mongodb://localhost:27018",
db = "cloud-platform-experiment-2017",
collection = "dataset")
demographics <- mongo (url = "mongodb://localhost:27018",
db = "cloud-platform-experiment-2017",
collection = "demographics")
feedback <- mongo (url = "mongodb://localhost:27018",
db = "cloud-platform-experiment-2017",
collection = "feedback")
Mongo variables
print (dataset)
## <Mongo collection> 'dataset'
## $aggregate(pipeline = "{}", options = "{\"allowDiskUse\":true}", handler = NULL, pagesize = 1000)
## $count(query = "{}")
## $distinct(key, query = "{}")
## $drop()
## $export(con = stdout(), bson = FALSE)
## $find(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0, handler = NULL, pagesize = 1000)
## $import(con, bson = FALSE)
## $index(add = NULL, remove = NULL)
## $info()
## $insert(data, pagesize = 1000, ...)
## $iterate(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0)
## $mapreduce(map, reduce, query = "{}", sort = "{}", limit = 0, out = NULL, scope = NULL)
## $remove(query, just_one = FALSE)
## $rename(name, db = NULL)
## $update(query, update = "{\"$set\":{}}", upsert = FALSE, multiple = FALSE)
demographics$find()
## id age gender cloudKnowledge createAWSvm createAZUREvm
## 1 8 22 Male 1 No No
## 2 7 23 Male 1 No No
## 3 5 19 Male 2 No No
## 4 6 23 Prefer not to say 2 No No
## 5 4 22 Male 3 No Yes
## 6 2 22 Male 2 No Yes
## 7 11 20 Male 1 Yes Yes
## 8 1 21 Male 1 Yes Yes
## 9 10 20 Male 2 No No
## 10 3 20 Prefer not to say 1 Yes No
## 11 9 19 Male 1 Yes Yes
age = demographics$find(fields = "{\"age\": 1, \"_id\": 0}")
table (age)
## age
## 19 20 21 22 23
## 2 3 1 3 2
plot_ly(
data.frame(table(age)),
labels = ~age,
values = ~Freq,
type = 'pie',
sort = FALSE,
direction = "clockwise",
textposition = 'inside',
textfont = list (size = 14), # 28
textinfo = 'label+percent',
insidetextfont = list(color = '#FFFFFF'),
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1)))
gender = demographics$find(fields = "{\"gender\": 1, \"_id\": 0}")
table (gender)
## gender
## Male Prefer not to say
## 9 2
plot_ly(
data.frame(table(gender)),
labels = ~gender,
values = ~Freq,
type = 'pie',
sort = FALSE,
direction = "clockwise",
textposition = 'inside',
textfont = list (size = 14), # 28
textinfo = 'label+percent',
insidetextfont = list(color = '#FFFFFF'),
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1)))
cloudKnowledge = demographics$find(fields = "{\"cloudKnowledge\": 1, \"_id\": 0}")
table (cloudKnowledge)
## cloudKnowledge
## 1 2 3
## 6 4 1
plot_ly(
data.frame(table(cloudKnowledge)),
x = ~Freq,
y = ~cloudKnowledge,
orientation = "h",
type = "bar",
marker = list(color = 'rgba(176,196,222, 0.6)',
line = list(color = 'rgba(176,196,222, 0.6)',
width = 1))
)
createAWSvm = demographics$find(fields = "{\"createAWSvm\": 1, \"_id\": 0}")
table (createAWSvm)
## createAWSvm
## No Yes
## 7 4
plot_ly(
data.frame(table(createAWSvm)),
labels = ~createAWSvm,
values = ~Freq,
type = 'pie',
sort = FALSE,
direction = "clockwise",
textposition = 'inside',
textfont = list (size = 14), # 28
textinfo = 'label+percent',
insidetextfont = list(color = '#FFFFFF'),
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1)))
createAZUREvm = demographics$find(fields = "{\"createAZUREvm\": 1, \"_id\": 0}")
table (createAZUREvm)
## createAZUREvm
## No Yes
## 6 5
plot_ly(
data.frame(table(createAZUREvm)),
labels = ~createAZUREvm,
values = ~Freq,
type = 'pie',
sort = FALSE,
direction = "clockwise",
textposition = 'inside',
textfont = list (size = 14), # 28
textinfo = 'label+percent',
insidetextfont = list(color = '#FFFFFF'),
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1)))
createAWSandAZUREvm = demographics$find(fields = "{\"createAZUREvm\": 1, \"createAWSvm\": 1, \"_id\": 0}")
print (createAWSandAZUREvm)
## createAWSvm createAZUREvm
## 1 No No
## 2 No No
## 3 No No
## 4 No No
## 5 No Yes
## 6 No Yes
## 7 Yes Yes
## 8 Yes Yes
## 9 No No
## 10 Yes No
## 11 Yes Yes
table (createAWSandAZUREvm)
## createAZUREvm
## createAWSvm No Yes
## No 5 2
## Yes 1 3
print (paste("AWS data points", dataset$count(query = "{\"platform\": \"AWS\"}")))
## [1] "AWS data points 76"
print (paste("Azure data points", dataset$count(query = "{\"platform\": \"Azure\"}")))
## [1] "Azure data points 76"
Plot
ggplot(
data = dataset$find(fields = "{\"platform\": 1, \"seconds\": 1, \"_id\": 0}")
) +
geom_boxplot(
aes (
x = platform, y = seconds,
colour = platform
)
) +
theme(
# text = element_text(size=20),
plot.title = element_text(hjust = 0.5, margin = margin(15, 0, 15, 0)),
axis.title.y = element_text (margin = margin (0, 20, 0, 0)),
axis.title.x = element_text (margin = margin (20, 0, 0, 0)),
legend.position = "bottom",
legend.box.background = element_rect(),
legend.box.margin = margin (5, 5, 5, 5),
legend.key.width = unit (1, "cm")
) +
scale_x_discrete (labels = c("AWS", "Azure")) +
ylab ("Efficiency") +
xlab ("Platform") +
ggtitle ("Median Efficiency") +
scale_y_continuous (breaks=seq(from=0, to=1300, by=150)) +
theme (legend.position="none")
Descriptive Statistics
awsSeconds = dataset$find(query = "{\"platform\": \"AWS\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
azureSeconds = dataset$find(query = "{\"platform\": \"Azure\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
print ("AWS Median")
## [1] "AWS Median"
summary (awsSeconds)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 30.0 110.5 171.0 222.3 231.5 1104.0
print ("Azure Median")
## [1] "Azure Median"
summary (azureSeconds)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 84.0 167.5 229.0 318.3 375.2 1276.0
Inferential Statistics
awsSeconds = dataset$find(query = "{\"platform\": \"AWS\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
azureSeconds = dataset$find(query = "{\"platform\": \"Azure\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
wilcox.test(
x = awsSeconds,
y = azureSeconds,
mu = 0,
paired = T,
conf.int = T
)
##
## Wilcoxon signed rank test with continuity correction
##
## data: awsSeconds and azureSeconds
## V = 617, p-value = 1.2e-05
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -115.99996 -39.99994
## sample estimates:
## (pseudo)median
## -73.49998
Effect Size
awsSeconds = dataset$find(query = "{\"platform\": \"AWS\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
azureSeconds = dataset$find(query = "{\"platform\": \"Azure\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
print (1 - VD.A (
awsSeconds,
azureSeconds
)$estimate
)
## [1] 0.6862881
Statistical Power
awsSeconds = dataset$find(query = "{\"platform\": \"AWS\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
azureSeconds = dataset$find(query = "{\"platform\": \"Azure\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
# Identifying Cohen's d for statistical power calculation
ef.d <- cohensD (awsSeconds, azureSeconds, method = "paired")
print(ef.d)
## [1] 0.4382009
# Running power test
pwr.t.test(
n = dataset$count(query = "{\"platform\": \"AWS\"}"), # Paired sample = 76 observations
sig.level = 0.05,
d = ef.d,
type = "paired"
)
##
## Paired t test power calculation
##
## n = 76
## d = 0.4382009
## sig.level = 0.05
## power = 0.9649161
## alternative = two.sided
##
## NOTE: n is number of *pairs*
Plot
ggplot(
data = dataset$find(fields = "{\"task\": 1, \"platform\": 1, \"seconds\": 1, \"_id\": 0}")
) +
geom_boxplot(
aes (
x = platform, y = seconds,
colour = platform
)
) +
theme(
# text = element_text(size=20),
plot.title = element_text(hjust = 0.5, margin = margin(15, 0, 15, 0)),
axis.title.y = element_text (margin = margin (0, 20, 0, 0)),
axis.title.x = element_text (margin = margin (20, 0, 0, 0)),
legend.position = "bottom",
legend.box.background = element_rect(),
legend.box.margin = margin (5, 5, 5, 5),
legend.key.width = unit (1, "cm")
) +
scale_x_discrete (labels = c("AWS", "Azure")) +
ylab ("Efficiency") +
ggtitle ("Median Efficiency by Task") +
theme (legend.position="none") +
facet_wrap(
~ task,
scales = "free"
)
Descriptive Statistics
for (task.var in 1:7) {
queryStringAWS <- paste0 ("{\"task\":", task.var, ", \"platform\": \"AWS\"}")
queryStringAzure <- paste0 ("{\"task\":", task.var, ", \"platform\": \"Azure\"}")
efficiencyAWS <- dataset$find(query = queryStringAWS, fields = "{\"seconds\": 1, \"_id\": 0}")
efficiencyAzure <- dataset$find(query = queryStringAzure, fields = "{\"seconds\": 1, \"_id\": 0}")
print (paste0("Task ", task.var, " AWS Summary = "))
summaryEfficiencyAWS <- summary (efficiencyAWS$seconds)
print (summaryEfficiencyAWS)
print (paste0("Task ", task.var, " Azure Summary = "))
summaryEfficiencyAzure <- summary (efficiencyAzure$seconds)
print (summaryEfficiencyAzure)
print (paste0("Difference task ", task.var, " = ", summaryEfficiencyAWS[[3]] - summaryEfficiencyAzure[[3]]))
}
## [1] "Task 1 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 30.0 87.0 138.0 169.8 199.5 567.0
## [1] "Task 1 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 84.0 151.5 181.0 180.0 212.0 288.0
## [1] "Difference task 1 = -43"
## [1] "Task 2 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 109.0 171.0 183.0 253.2 270.0 787.0
## [1] "Task 2 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 107.0 191.5 244.0 346.5 346.5 1276.0
## [1] "Difference task 2 = -61"
## [1] "Task 3 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 42.0 106.5 142.0 172.1 176.0 433.0
## [1] "Task 3 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 111.0 148.8 197.0 291.9 326.8 881.0
## [1] "Difference task 3 = -55"
## [1] "Task 4 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 93.0 129.5 180.0 182.4 219.0 290.0
## [1] "Task 4 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 150.0 185.5 220.0 272.2 265.0 605.0
## [1] "Difference task 4 = -40"
## [1] "Task 5 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 64.0 101.0 180.0 220.5 249.0 595.0
## [1] "Task 5 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 150.0 233.5 276.0 373.1 469.0 827.0
## [1] "Difference task 5 = -96"
## [1] "Task 6 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 60.0 77.5 120.0 147.7 169.5 400.0
## [1] "Task 6 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 127.0 160.5 220.0 271.3 339.5 534.0
## [1] "Difference task 6 = -100"
## [1] "Task 7 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 120.0 175.5 236.0 406.6 434.0 1104.0
## [1] "Task 7 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 140.0 220.5 427.0 490.9 716.0 1037.0
## [1] "Difference task 7 = -191"
Effect Size
for (task.var in 1:7) {
queryStringAWS <- paste0 ("{\"task\":", task.var, ", \"platform\": \"AWS\"}")
queryStringAzure <- paste0 ("{\"task\":", task.var, ", \"platform\": \"Azure\"}")
efficiencyAWS <- dataset$find(query = queryStringAWS, fields = "{\"seconds\": 1, \"_id\": 0}")
efficiencyAzure <- dataset$find(query = queryStringAzure, fields = "{\"seconds\": 1, \"_id\": 0}")
effectSize <- VD.A (
efficiencyAWS$seconds,
efficiencyAzure$seconds
)
print (paste0("Task ", task.var, " effect size = ", 1 - effectSize$estimate))
}
## [1] "Task 1 effect size = 0.661157024793388"
## [1] "Task 2 effect size = 0.628099173553719"
## [1] "Task 3 effect size = 0.709090909090909"
## [1] "Task 4 effect size = 0.704545454545455"
## [1] "Task 5 effect size = 0.768595041322314"
## [1] "Task 6 effect size = 0.809917355371901"
## [1] "Task 7 effect size = 0.603305785123967"
Plot
ggplot(
data = dataset$find(fields = "{\"participant\": 1, \"platform\": 1, \"seconds\": 1, \"_id\": 0}")
) +
geom_boxplot(
aes (
x = platform, y = seconds,
colour = platform
)
) +
theme(
# text = element_text(size=20),
plot.title = element_text(hjust = 0.5, margin = margin(15, 0, 15, 0)),
axis.title.y = element_text (margin = margin (0, 20, 0, 0)),
axis.title.x = element_text (margin = margin (20, 0, 0, 0)),
legend.position = "bottom",
legend.box.background = element_rect(),
legend.box.margin = margin (5, 5, 5, 5),
legend.key.width = unit (1, "cm")
) +
scale_x_discrete (labels = c("AWS", "Azure")) +
ylab ("Efficiency") +
ggtitle ("Median Efficiency by Participant") +
theme (legend.position="none") +
facet_wrap(
~ participant,
scales = "free"
)
Descriptive Statistics
for (participant.var in 1:11) {
queryStringAWS <- paste0 ("{\"participant\":", participant.var, ", \"platform\": \"AWS\"}")
queryStringAzure <- paste0 ("{\"participant\":", participant.var, ", \"platform\": \"Azure\"}")
efficiencyAWS <- dataset$find(query = queryStringAWS, fields = "{\"seconds\": 1, \"_id\": 0}")
efficiencyAzure <- dataset$find(query = queryStringAzure, fields = "{\"seconds\": 1, \"_id\": 0}")
print (paste0("Participant ", participant.var, " AWS Summary = "))
summaryEfficiencyAWS <- summary (efficiencyAWS$seconds)
print (summaryEfficiencyAWS)
print (paste0("Participant ", participant.var, " Azure Summary = "))
summaryEfficiencyAzure <- summary (efficiencyAzure$seconds)
print (summaryEfficiencyAzure)
print (paste0("Difference participant", participant.var, " = ", summaryEfficiencyAWS[[3]] - summaryEfficiencyAzure[[3]]))
}
## [1] "Participant 1 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 150.0 182.5 210.0 250.7 290.0 450.0
## [1] "Participant 1 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 152.0 160.0 163.0 216.1 260.5 357.0
## [1] "Difference participant1 = 47"
## [1] "Participant 2 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 120.0 133.0 195.0 227.6 306.0 400.0
## [1] "Participant 2 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 140.0 223.0 276.0 400.3 529.5 881.0
## [1] "Difference participant2 = -81"
## [1] "Participant 3 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 85.0 171.0 216.0 388.9 524.5 1030.0
## [1] "Participant 3 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 200.0 221.0 300.0 458.7 496.5 1276.0
## [1] "Difference participant3 = -84"
## [1] "Participant 4 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 108.0 130.0 220.0 206.9 230.0 400.0
## [1] "Participant 4 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 145.0 170.0 210.0 257.4 223.5 660.0
## [1] "Difference participant4 = 10"
## [1] "Participant 5 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 70.0 103.0 109.0 112.9 127.5 150.0
## [1] "Participant 5 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 169.0 197.5 231.0 248.9 242.5 462.0
## [1] "Difference participant5 = -122"
## [1] "Participant 6 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 175 189 290 425 514 1104
## [1] "Participant 6 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 219.0 235.5 326.0 382.0 415.5 827.0
## [1] "Difference participant6 = -36"
## [1] "Participant 7 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 93.0 118.0 144.0 153.4 189.5 222.0
## [1] "Participant 7 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 106.0 130.5 203.0 299.7 377.0 774.0
## [1] "Difference participant7 = -59"
## [1] "Participant 8 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 30.0 53.0 69.0 154.7 230.0 418.0
## [1] "Participant 8 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 143.0 162.0 265.0 328.3 323.0 920.0
## [1] "Difference participant8 = -196"
## [1] "Participant 9 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 149.0 168.5 207.0 269.9 314.5 567.0
## [1] "Participant 9 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 113.0 180.0 281.0 283.6 333.5 564.0
## [1] "Difference participant9 = -74"
## [1] "Participant 10 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 77.0 96.0 108.0 134.2 165.0 236.0
## [1] "Participant 10 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 127.0 194.5 234.0 365.6 386.0 1037.0
## [1] "Difference participant10 = -126"
## [1] "Participant 11 AWS Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 55.0 70.0 90.0 108.4 136.5 201.0
## [1] "Participant 11 Azure Summary = "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 84.0 114.0 142.5 251.3 357.8 605.0
## [1] "Difference participant11 = -52.5"
Effect Size
for (participant.var in 1:11) {
queryStringAWS <- paste0 ("{\"participant\":", participant.var, ", \"platform\": \"AWS\"}")
queryStringAzure <- paste0 ("{\"participant\":", participant.var, ", \"platform\": \"Azure\"}")
efficiencyAWS <- dataset$find(query = queryStringAWS, fields = "{\"seconds\": 1, \"_id\": 0}")
efficiencyAzure <- dataset$find(query = queryStringAzure, fields = "{\"seconds\": 1, \"_id\": 0}")
effectSize <- VD.A (
efficiencyAWS$seconds,
efficiencyAzure$seconds
)
print (paste0("Participant ", participant.var, " effect size = ", 1 - effectSize$estimate))
}
## [1] "Participant 1 effect size = 0.387755102040816"
## [1] "Participant 2 effect size = 0.755102040816326"
## [1] "Participant 3 effect size = 0.673469387755102"
## [1] "Participant 4 effect size = 0.540816326530612"
## [1] "Participant 5 effect size = 1"
## [1] "Participant 6 effect size = 0.561224489795918"
## [1] "Participant 7 effect size = 0.693877551020408"
## [1] "Participant 8 effect size = 0.775510204081633"
## [1] "Participant 9 effect size = 0.510204081632653"
## [1] "Participant 10 effect size = 0.857142857142857"
## [1] "Participant 11 effect size = 0.738095238095238"
Plot
correctAWS = dataset$find(query = "{\"platform\": \"AWS\"}",
fields = "{\"correct\": 1, \"_id\": 0}")
correctAzure = dataset$find(query = "{\"platform\": \"Azure\"}",
fields = "{\"correct\": 1, \"platform\": 1, \"_id\": 0}")
platforms <- c("AWS", "Azure")
correct <- c(70, 63)
incorrect <- c(6, 13)
data <- data.frame(platforms, correct, incorrect)
plot_ly(
data,
x = ~platforms,
y = ~correct,
type = 'bar',
name = 'Correct'
) %>%
add_trace(
y = ~incorrect,
name = 'Incorrect'
) %>%
layout(
yaxis = list(title = 'Count'),
barmode = 'stack'
)
Descriptive Statistics
correct = dataset$find(fields = "{\"correct\": 1, \"platform\": 1, \"_id\": 0}")
table (correct)
## correct
## platform no yes
## AWS 6 70
## Azure 13 63
Inferential Statistics & Effect Size
correct = dataset$find(fields = "{\"correct\": 1, \"platform\": 1, \"_id\": 0}")
CrossTable(table(correct), fisher = TRUE, chisq = FALSE, expected = TRUE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Expected N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 152
##
##
## | correct
## platform | no | yes | Row Total |
## -------------|-----------|-----------|-----------|
## AWS | 6 | 70 | 76 |
## | 9.500 | 66.500 | |
## | 1.289 | 0.184 | |
## | 0.079 | 0.921 | 0.500 |
## | 0.316 | 0.526 | |
## | 0.039 | 0.461 | |
## -------------|-----------|-----------|-----------|
## Azure | 13 | 63 | 76 |
## | 9.500 | 66.500 | |
## | 1.289 | 0.184 | |
## | 0.171 | 0.829 | 0.500 |
## | 0.684 | 0.474 | |
## | 0.086 | 0.414 | |
## -------------|-----------|-----------|-----------|
## Column Total | 19 | 133 | 152 |
## | 0.125 | 0.875 | |
## -------------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 2.947368 d.f. = 1 p = 0.08601752
##
## Pearson's Chi-squared test with Yates' continuity correction
## ------------------------------------------------------------
## Chi^2 = 2.165414 d.f. = 1 p = 0.1411466
##
##
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Sample estimate odds ratio: 0.4177346
##
## Alternative hypothesis: true odds ratio is not equal to 1
## p = 0.1396491
## 95% confidence interval: 0.1225451 1.263983
##
## Alternative hypothesis: true odds ratio is less than 1
## p = 0.06982455
## 95% confidence interval: 0 1.08485
##
## Alternative hypothesis: true odds ratio is greater than 1
## p = 0.97624
## 95% confidence interval: 0.1481983 Inf
##
##
##
Descriptive Statistics
correctAWS = dataset$find(query = "{\"platform\": \"AWS\"}",
fields = "{\"correct\": 1, \"task\": 1, \"_id\": 0}")
correctAzure = dataset$find(query = "{\"platform\": \"Azure\"}",
fields = "{\"correct\": 1, \"task\": 1, \"_id\": 0}")
print ("Correct AWS")
## [1] "Correct AWS"
table (correctAWS)
## correct
## task no yes
## 1 0 11
## 2 1 10
## 3 0 11
## 4 2 8
## 5 1 10
## 6 0 11
## 7 2 9
print ("Correct Azure")
## [1] "Correct Azure"
table (correctAzure)
## correct
## task no yes
## 1 1 10
## 2 1 10
## 3 2 8
## 4 1 10
## 5 3 8
## 6 3 8
## 7 2 9
cbind (table (correctAWS), table(correctAzure))
## no yes no yes
## 1 0 11 1 10
## 2 1 10 1 10
## 3 0 11 2 8
## 4 2 8 1 10
## 5 1 10 3 8
## 6 0 11 3 8
## 7 2 9 2 9
Descriptive Statistics
correctAWS = dataset$find(query = "{\"platform\": \"AWS\"}",
fields = "{\"correct\": 1, \"participant\": 1, \"_id\": 0}")
correctAzure = dataset$find(query = "{\"platform\": \"Azure\"}",
fields = "{\"correct\": 1, \"participant\": 1, \"_id\": 0}")
print ("Correct AWS")
## [1] "Correct AWS"
table (correctAWS)
## correct
## participant no yes
## 1 0 7
## 2 2 5
## 3 0 7
## 4 0 7
## 5 1 6
## 6 3 4
## 7 0 7
## 8 0 7
## 9 0 7
## 10 0 6
## 11 0 7
print ("Correct Azure")
## [1] "Correct Azure"
table (correctAzure)
## correct
## participant no yes
## 1 0 7
## 2 6 1
## 3 2 5
## 4 0 7
## 5 2 5
## 6 1 6
## 7 0 7
## 8 0 7
## 9 0 7
## 10 2 5
## 11 0 6
cbind (table (correctAWS), table(correctAzure))
## no yes no yes
## 1 0 7 0 7
## 2 2 5 6 1
## 3 0 7 2 5
## 4 0 7 0 7
## 5 1 6 2 5
## 6 3 4 1 6
## 7 0 7 0 7
## 8 0 7 0 7
## 9 0 7 0 7
## 10 0 6 2 5
## 11 0 7 0 6
Plot
gui = feedback$find(fields = "{\"bestGUI\": 1, \"_id\": 0}")
table (gui)
## gui
## AWS Azure
## 7 4
plot_ly(
data.frame(table(gui)),
labels = ~gui,
values = ~Freq,
type = 'pie',
sort = FALSE,
direction = "clockwise",
textposition = 'inside',
textfont = list (size = 14), # 28
textinfo = 'label+percent',
insidetextfont = list(color = '#FFFFFF'),
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1)))
Descriptive Statistics
gui = feedback$find(fields = "{\"bestGUI\": 1, \"id\": 1, \"_id\": 0}", sort = "{\"id\": 1}")
createAWSandAZUREvm = demographics$find(fields = "{\"createAZUREvm\": 1, \"createAWSvm\": 1, \"_id\": 0}", sort = "{\"participant\": 1}")
cbind(gui, createAWSandAZUREvm)
## id bestGUI createAWSvm createAZUREvm
## 1 1 Azure No No
## 2 2 Azure No No
## 3 3 AWS No No
## 4 4 AWS No No
## 5 5 AWS No Yes
## 6 6 Azure No Yes
## 7 7 AWS Yes Yes
## 8 8 AWS Yes Yes
## 9 9 Azure No No
## 10 10 AWS Yes No
## 11 11 AWS Yes Yes
Plot
ease = feedback$find(fields = "{\"easiestPlatform\": 1, \"_id\": 0}")
print (ease)
## easiestPlatform
## 1 AWS
## 2 AWS
## 3 Could not observe significant differences
## 4 AWS
## 5 AWS
## 6 AWS
## 7 AWS
## 8 AWS
## 9 AWS
## 10 Azure
## 11 AWS
plot_ly(
data.frame(table(ease)),
labels = ~ease,
values = ~Freq,
type = 'pie',
sort = FALSE,
direction = "clockwise",
textposition = 'inside',
textfont = list (size = 14), # 28
textinfo = 'label+percent',
insidetextfont = list(color = '#FFFFFF'),
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1))
)
Descriptive Statistics
ease = feedback$find(fields = "{\"easiestPlatform\": 1, \"_id\": 0}", sort = "{\"id\": 1}")
createAWSandAZUREvm = demographics$find(fields = "{\"createAZUREvm\": 1, \"createAWSvm\": 1, \"_id\": 0}", sort = "{\"participant\": 1}")
cbind(ease, createAWSandAZUREvm)
## easiestPlatform createAWSvm createAZUREvm
## 1 AWS No No
## 2 AWS No No
## 3 AWS No No
## 4 AWS No No
## 5 AWS No Yes
## 6 Azure No Yes
## 7 Could not observe significant differences Yes Yes
## 8 AWS Yes Yes
## 9 AWS No No
## 10 AWS Yes No
## 11 AWS Yes Yes
Plot
preference = feedback$find(fields = "{\"favoritePlatform\": 1, \"_id\": 0}")
print (preference)
## favoritePlatform
## 1 AWS
## 2 AWS
## 3 AWS
## 4 AWS
## 5 AWS
## 6 AWS
## 7 AWS
## 8 Azure
## 9 AWS
## 10 Azure
## 11 AWS
plot_ly(
data.frame(table(preference)),
labels = ~preference,
values = ~Freq,
type = 'pie',
sort = FALSE,
direction = "clockwise",
textposition = 'inside',
textfont = list (size = 14), # 28
textinfo = 'label+percent',
insidetextfont = list(color = '#FFFFFF'),
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1))
)
Descriptive Statistics
preference = feedback$find(fields = "{\"favoritePlatform\": 1, \"_id\": 0}", sort = "{\"id\": 1}")
createAWSandAZUREvm = demographics$find(fields = "{\"createAZUREvm\": 1, \"createAWSvm\": 1, \"_id\": 0}", sort = "{\"participant\": 1}")
cbind(preference, createAWSandAZUREvm)
## favoritePlatform createAWSvm createAZUREvm
## 1 AWS No No
## 2 AWS No No
## 3 AWS No No
## 4 AWS No No
## 5 AWS No Yes
## 6 Azure No Yes
## 7 AWS Yes Yes
## 8 AWS Yes Yes
## 9 Azure No No
## 10 AWS Yes No
## 11 AWS Yes Yes